import jieba
FILE_NUM = 1885
if __name__ == "__main__":
    count = 0
    total_file = open("TotalCut.txt",'w',encoding='utf-8')
    stopword_file = open("StopWord.txt",'r',encoding='utf-8')
    stopwords = stopword_file.readlines()
    # print(stopwords)
    for i in range(len(stopwords)):
        stopwords[i] = stopwords[i].replace('\n','')
    # print(stopwords)
    for i in range(1,FILE_NUM):
        print("------ ",i," ------")
        filename = str(i)+".txt"
        f_in = open("data/"+filename,'r',encoding='utf-8')
        text_raw = f_in.readline()
        f_in.close()
        f_out = open("cut/"+filename,'w',encoding='utf-8')
        temp_cut = jieba.lcut(text_raw)
        for i in temp_cut:
            if(i not in stopwords):
                total_file.write(i+' ')
                f_out.write(i+' ')
        f_out.close()
    total_file.close()